To increase the resolution of the scRNAseq, we utilized the dataset from Tasic et al, with over 43,000 genes and 23,000 cells.
liger_seqfish = createLiger(list(tasic = tasic, seqfish = seqfish))
liger_seqfish %<>% normalize() %>% selectGenes() %>% scaleNotCenter()
liger_seqfish %<>% optimizeALS(k=20) %>% quantile_norm() %>% louvainCluster()
liger_seqfish %<>% runTSNE()
names(liger_seqfish@clusters)=rownames(liger_seqfish@tsne.coords)
seqfish_purity = calcPurity(liger_seqfish, seq_clust)
tasic_purity = calcPurity(liger_seqfish, tasic_clust)
seqfish_ari = calcARI(liger_seqfish, seq_clust)
tasic_ari = calcARI(liger_seqfish, tasic_clust)
alignment = calcAlignment(liger_seqfish)
seqfish_stats = c(seqfish_purity, seqfish_ari, tasic_purity, tasic_ari, alignment)
by_dataset = plotByDatasetAndCluster(liger_seqfish, return.plots = T)[[1]]
by_liger_cluster = plotByDatasetAndCluster(liger_seqfish, return.plots = T)[[2]]
by_tasic_cluster = plotByDatasetAndCluster(liger_seqfish, clusters = tasic_clust, return.plots = T)[[2]]
by_seqfish_cluster = plotByDatasetAndCluster(liger_seqfish, clusters = seq_clust, return.plots = T)[[2]]
seqfish_stats
## [1] 0.59674389 0.01777023 0.46746692 0.18135188 0.82197716
by_dataset
by_liger_cluster
by_tasic_cluster
by_seqfish_cluster
To directly compare the performance on the given seqfish data to a baseline dataset, the 160 gene STARmap dataset utilized in Wang and Allen et. al was downsampled to 120 genes and integrated with the scRNAseq dataset.
starmap_downsampled = starmap_160[sample(1:160, 120, replace = FALSE),]
liger_120 = createLiger(list(tasic = tasic, starmap = starmap_downsampled))
liger_120 %<>% normalize() %>% selectGenes() %>% scaleNotCenter()
liger_120 %<>% optimizeALS(k=20) %>% quantile_norm() %>% louvainCluster()
liger_120 %<>% runTSNE()
names(liger_120@clusters)=rownames(liger_120@tsne.coords)
starmap_purity = calcPurity(liger_120, starmap_160_clust)
tasic_purity = calcPurity(liger_120, tasic_clust)
starmap_ari = calcARI(liger_120, starmap_160_clust)
tasic_ari = calcARI(liger_120, tasic_clust)
alignment = calcAlignment(liger_120)
starmap_stats = c(seqfish_purity, seqfish_ari, tasic_purity, tasic_ari, alignment)
by_dataset = plotByDatasetAndCluster(liger_120, return.plots = T)[[1]]
by_liger_cluster = plotByDatasetAndCluster(liger_120, return.plots = T)[[2]]
by_tasic_cluster = plotByDatasetAndCluster(liger_120, clusters = tasic_clust, return.plots = T)[[2]]
by_starmap_cluster = plotByDatasetAndCluster(liger_120, clusters = starmap_clust, return.plots = T)[[2]]
starmap_stats
## [1] 0.59674389 0.01777023 0.74669213 0.50919582 0.70661001
by_dataset
by_liger_cluster
by_tasic_cluster
by_starmap_cluster
To determine the impact of the number of genes in spatial data, a dataset with a larger number of genes was utilized. The 1020 gene STARmap dataset utilized in Wang and Allen et. al was repeatedly downsampled and integrated with the much larger scRNAseq dataset.
starmap_downsampled = starmap_1020[sample(1:1020, 700, replace = FALSE),]
liger_700 = createLiger(list(tasic = tasic, starmap = starmap_downsampled))
liger_700 %<>% normalize() %>% selectGenes() %>% scaleNotCenter()
liger_700 %<>% optimizeALS(k=20) %>% quantile_norm() %>% louvainCluster()
liger_700 %<>% runTSNE()
names(liger_700@clusters)=rownames(liger_700@tsne.coords)
starmap_purity = calcPurity(liger_700, starmap_clust)
tasic_purity = calcPurity(liger_700, tasic_clust)
starmap_ari = calcARI(liger_700, starmap_clust)
tasic_ari = calcARI(liger_700, tasic_clust)
alignment = calcAlignment(liger_700)
by_dataset = plotByDatasetAndCluster(liger_700, return.plots = T)[[1]]
by_liger_cluster = plotByDatasetAndCluster(liger_700, return.plots = T)[[2]]
by_tasic_cluster = plotByDatasetAndCluster(liger_700, clusters = tasic_clust, return.plots = T)[[2]]
by_starmap_cluster = plotByDatasetAndCluster(liger_700, clusters = starmap_clust, return.plots = T)[[2]]
c(starmap_purity, starmap_ari, tasic_purity, tasic_ari, alignment)
## [1] 0.5827338 0.3496803 0.8707543 0.6198895 0.8467468
by_dataset
by_liger_cluster
by_tasic_cluster
by_starmap_cluster
To account for the randomness associated with downsampling genes and the stochastic nature of our method, we repeated these analyses several times and took an average across runs.
stats_starmap = matrix(ncol = 6)
colnames(stats_starmap) = c("genes","purity_spatial","purity_rnaseq","ari_spatial","ari_rnaseq","alignment")
stats_seqfish = stats_starmap
for(n in 1:10){
for(i in seq(900, 100, -100)){
starmap_downsampled = starmap_1020[sample(1:1020, i, replace = FALSE),]
liger_down = createLiger(list(tasic = tasic, starmap = starmap_downsampled))
liger_down %<>% normalize() %>% selectGenes() %>% scaleNotCenter()
liger_down %<>% optimizeALS(k=20) %>% quantile_norm() %>% louvainCluster()
liger_down %<>% runTSNE()
names(liger_down@clusters)=rownames(liger_down@tsne.coords)
starmap_purity = calcPurity(liger_down, starmap_clust)
tasic_purity = calcPurity(liger_down, tasic_clust)
starmap_ari = calcARI(liger_down, starmap_clust)
tasic_ari = calcARI(liger_down, tasic_clust)
alignment = calcAlignment(liger_down)
stats_starmap = rbind(stats_starmap, c(i, starmap_purity, tasic_purity, starmap_ari, tasic_ari,alignment))
}
liger_seqfish = createLiger(list(tasic = tasic, seqfish = seqfish))
liger_seqfish %<>% normalize() %>% selectGenes() %>% scaleNotCenter()
liger_seqfish %<>% optimizeALS(k=20) %>% quantile_norm() %>% louvainCluster()
liger_seqfish %<>% runTSNE()
names(liger_seqfish@clusters)=rownames(liger_seqfish@tsne.coords)
starmap_purity = calcPurity(liger_seqfish, seq_clust)
tasic_purity = calcPurity(liger_seqfish, tasic_clust)
starmap_ari = calcARI(liger_seqfish, seq_clust)
tasic_ari = calcARI(liger_seqfish, tasic_clust)
alignment = calcAlignment(liger_seqfish)
stats_seqfish = rbind(stats, c(120, starmap_purity, tasic_purity, starmap_ari, tasic_ari,alignment))
}
stats_starmap = stats_starmap[2:nrow(stats_starmap),]
stats_seqfish = stats_seqfish[2:nrow(stats_seqfish),]
stats_starmap = stats[order(stats_starmap[,1]),]
Downsampling summary